## -------------------------
## Analyze runtime test data
## -------------------------
library(tidyverse)
library(igraph)
library(doMC)
library(gridExtra)
library(grid)
ben_theme <- function(){
    theme_classic() +
        theme(panel.background = element_blank(),
              panel.grid.major = element_blank(),
              panel.grid.minor = element_blank(),
              axis.line = element_line(colour = "black"),
              panel.border = element_rect(colour = "black", fill = NA, size = 1),
              strip.background = element_blank(),
              legend.position = "bottom", legend.title = element_blank(),
              plot.title = element_text(hjust = 0.5),
              plot.subtitle = element_text(hjust = .5),
              axis.title = element_text(size = 14))
}

## -----------
## Actual maps
## -----------
## params <- expand.grid(
##     nprecs = seq(25, 200, by = 25),
##     ndists = 2:3,
##     alg = c("enumpart"),
##     nsims = 1:50,
##     stringsAsFactors = FALSE
## )
params <- expand.grid(
    nprecs = seq(40, 200, by = 40),
    ndists = c(2, 5, 10),
    alg = c("enumpart"),
    nsims = 1:25,
    stringsAsFactors = FALSE
)
mem <- read_table("../data/runtime_test/runtime_test_memoryusage.txt")
mem <- subset(mem, JobName == "Rscript")
mem_time <- strsplit(mem$CPUTime, split = ":")

## Format
mem$runtime <- as.numeric(unlist(lapply(mem_time, `[[`, 1))) * 60 * 60 +
    as.numeric(unlist(lapply(mem_time, `[[`, 2))) * 60 +
    as.numeric(unlist(lapply(mem_time, `[[`, 3)))
mem$runtime[is.na(mem$runtime)] <- 24*60*60
mem$memusage <- as.numeric(gsub("K", "", mem$MaxRSS))
params <- params[1:nrow(mem),]
params$memusage <- mem$memusage
params$runtime <- mem$runtime
params$ndists <- paste(params$ndists, "Districts")
params$alg <- case_when(params$alg == "redist"~"redist.enumerate()",
                        TRUE~params$alg)
params$failed <- case_when(params$memusage/1e6 > 170~"Ran Out of Memory\n(180GB Limit)",
                           TRUE~"Finished Building ZDD")

## Load maps and calculate graph statistics
registerDoMC(detectCores())
graph_stats <- foreach(i = 1:nrow(params), .combine = "rbind") %dopar% {

    ## Load map
    map <- readLines(paste0("../data/runtime_test/adjlist_map_ordered_", i, ".dat"))
    map <- do.call("rbind", strsplit(map, " "))
    map_cln <- cbind(as.numeric(map[,1]), as.numeric(map[,2]))
    igr <- graph_from_edgelist(map_cln, directed = FALSE)

    ## Calculate frontier
    outp <- system(paste0("python ../enumpart_private/frontier_size/calc_frontier_size.py < ../data/runtime_test/adjlist_map_ordered_", i, ".dat"), intern = TRUE)

    ## Calculate stats
    maxf <- parse_number(outp[1])
    avgf <- parse_number(outp[2])
    avgf_sq <- as.numeric(strsplit(outp[3], " ")[[1]][5])
    
    ## Calculate summary stats
    return(data.frame(
        mapnum = i,
        num_edges = gsize(igr),
        diameter = diameter(igr, directed = FALSE),
        mean_distance = mean_distance(igr, directed = FALSE),
        edge_density = edge_density(igr),
        transitivity = transitivity(igr),
        max_frontier = maxf,
        avg_frontier = avgf,
        avgsq_frontier = avgf_sq
    ))                
    
}
params <- bind_cols(params, graph_stats) %>%
    filter(runtime != 86400) %>%
    drop_na(memusage)

params <- params %>%
    mutate(memusage_scaled = ifelse(memusage>180000000,180,memusage/1e6),
           ndists = factor(ndists, levels = c("2 Districts", "5 Districts",
                                              "10 Districts"))) 

## All as one
runtime <- params %>% 
     ggplot(aes(as.factor(nprecs), runtime/60/60, group = as.factor(nprecs),
                   pch = failed)) +
    geom_jitter() +
    facet_wrap(~ ndists, ncol = 1, strip.position = "left") +
    scale_shape_manual(values = c(3, 1)) +
    labs(x = "Number of Precincts in Map",
         y = "Runtime (hours)",
         title = "Runtime Scalability") +  
    ben_theme() +
    theme(legend.position = c(.32, .92),
          legend.text = element_text(size = 10),
          strip.placement = "outside",
          strip.text = element_text(size = 12)) +
    guides(size = 'none')
memuse <- params %>% 
    ggplot(aes(as.factor(nprecs), memusage_scaled, group = as.factor(nprecs),
               pch = failed)) +
    geom_jitter() +
    facet_wrap(~ ndists, ncol = 1, strip.position = "left") + 
    geom_hline(aes(yintercept = 180), lty = "dashed", colour = "red") +
    scale_shape_manual(values = c(3, 1)) +
    annotate("text", x = 1.25, 190, label = "Memory Limit", colour = "red") +
    labs(x = "Number of Precincts in Map",
         y = "Memory Usage (GB)",
         title = "Memory Usage Scalability") + 
    ben_theme() +
    theme(legend.position = "none",
          strip.background = element_blank(),
          strip.text.y = element_blank())
frontier <- params %>% 
    ggplot(aes(max_frontier, memusage_scaled, pch = failed)) +
    geom_jitter() +
    facet_wrap(~ ndists, ncol = 1, strip.position = "left") + 
    geom_hline(aes(yintercept = 180), lty = "dashed", colour = "red") +
    scale_shape_manual(values = c(3, 1)) + 
    annotate("text", x = 7.5, 190, label = "Memory Limit", colour = "red") +
    labs(x = "Max Frontier Size of Graph",
         y = "Memory Usage (GB)",
         title = "Memory Usage Scales with Frontier Size") + 
    ben_theme() +
    theme(legend.position = "none",
          strip.background = element_blank(),
          strip.text.y = element_blank())

pdf(file = "../paper/figs/scalability_analysis.pdf", height = 6, width = 12)
grid.arrange(runtime, memuse, frontier, nrow = 1)
dev.off()

## Plot - runtime
runtime_lim <- 24*60*60
params %>% drop_na(memusage) %>%
    ggplot(aes(as.factor(nprecs), runtime/60/60, group = as.factor(nprecs),
                   colour = failed)) +
    geom_jitter(width = .15) + 
    facet_grid(~ ndists) +
    scale_colour_manual(values = c("black", "red")) + 
    labs(x = "Number of Precincts in Map",
         y = "Runtime (hours)",
         title = "Runtime of enumpart on Subsets of NH Map") +  
    ben_theme() +
    theme(legend.position = c(.125, .825)) + 
    ggsave("../paper/figs/maps_runtime.pdf", height = 4, width = 8)

## Plot - memory usage
params %>% drop_na(memusage) %>%
    ggplot(aes(as.factor(nprecs), memusage/1e6, group = as.factor(nprecs))) +
    geom_jitter(width = .15) + 
    geom_hline(aes(yintercept = 256), lty = "dashed", colour = "red") +
    annotate("text", x = 2.25, 265, label = "256GB Memory Limit", colour = "red") +
    facet_grid(~ ndists) + 
    labs(x = "Number of Precincts in Map",
         y = "Memory Usage (GB)",
         title = "Memory Usage of enumpart on Subsets of NH Map") + 
    ben_theme() +
    ggsave("../paper/figs/maps_memuse.pdf", height = 4, width = 8)

## Plot - memory usage against frontier
params %>% drop_na(memusage) %>%
    ggplot(aes(avg_frontier, memusage/1e6)) +
    geom_point() + 
    geom_hline(aes(yintercept = 256), lty = "dashed", colour = "red") +
    annotate("text", x = 4.5, 265, label = "256GB Memory Limit", colour = "red") +
    facet_grid(~ ndists) +
    labs(x = "Average Frontier Size of Graph",
         y = "Memory Usage (GB)",
         title = "Memory Usage of enumpart Scales with the Graph Frontier Size") + 
    ben_theme() +
    ggsave("../paper/figs/maps_frontier_memuse.pdf", height = 4, width = 8)

## -------
## Lattice
## -------
params <- expand.grid(
    lat_size = seq(3, 15, by = 1),
    ndists = 2:3,
    alg = c("enumpart"),
    stringsAsFactors = FALSE
)
mem <- read_table("../data/runtime_test_lattice/runtime_test_memoryusage.txt")
mem <- subset(mem, JobName == "Rscript")
mem_time <- strsplit(mem$CPUTime, split = ":")

## Format
mem$runtime <- as.numeric(unlist(lapply(mem_time, `[[`, 1))) * 60 * 60 +
    as.numeric(unlist(lapply(mem_time, `[[`, 2))) * 60 +
    as.numeric(unlist(lapply(mem_time, `[[`, 3)))
mem$memusage <- as.numeric(gsub("K", "", mem$MaxRSS))
params <- params[1:nrow(mem),]
params$memusage <- mem$memusage
params$runtime <- mem$runtime
params$ndists <- paste(params$ndists, "Districts")
params$alg <- case_when(params$alg == "redist"~"redist.enumerate()",
                        TRUE~params$alg)
params$lat_size <- as.factor(paste0(params$lat_size, "x", params$lat_size))
params$lat_size <- factor(params$lat_size,
                          levels = levels(params$lat_size)[c(7:13, 1:6)])

## Plot - runtime
params %>% filter(ndists == "2 Districts") %>%
    ggplot(aes(as.factor(lat_size), runtime, group = 1)) +
    geom_point() + geom_line() + 
    labs(x = "Number of Precincts in Map",
         y = "Runtime (seconds)",
         title = "Runtime Comparison of enumpart on 2-District Lattice Map") +  
    ben_theme()

## Plot - memory usage
params %>% filter(ndists == "2 Districts") %>%
    ggplot(aes(as.factor(lat_size), memusage/1e6, group = 1)) +
    geom_point(size = 3) + geom_line(lwd = 1.5) +
    geom_hline(aes(yintercept = 256), lty = "dashed", colour = "red") +
    annotate("text", x = 2.25, 265, label = "256GB Memory Limit", colour = "red") + 
    labs(x = "Size of Lattice",
         y = "Memory Usage (GB)",
         title = "Memory Usage of enumpart on 2-District Lattice Map") + 
    ben_theme() +
    ggsave("../output/lattice_memuse.pdf", height = 4, width = 6.5)
